{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.feature_extraction import DictVectorizer\n",
    "from sklearn import tree  # 决策树\n",
    "from sklearn import preprocessing\n",
    "import csv # 读取的方式"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['RID', 'age', 'income', 'student', 'credit_rating', 'class_buys_computer']\n",
      "#####1#########\n",
      "[{'age': 'youth', 'income': 'high', 'student': 'no', 'credit_rating': 'fair'}, {'age': 'youth', 'income': 'high', 'student': 'no', 'credit_rating': 'excellent'}, {'age': 'middle_aged', 'income': 'high', 'student': 'no', 'credit_rating': 'fair'}, {'age': 'senior', 'income': 'medium', 'student': 'no', 'credit_rating': 'fair'}, {'age': 'senior', 'income': 'low', 'student': 'yes', 'credit_rating': 'fair'}, {'age': 'senior', 'income': 'low', 'student': 'yes', 'credit_rating': 'excellent'}, {'age': 'middle_aged', 'income': 'low', 'student': 'yes', 'credit_rating': 'excellent'}, {'age': 'youth', 'income': 'medium', 'student': 'no', 'credit_rating': 'fair'}, {'age': 'youth', 'income': 'low', 'student': 'yes', 'credit_rating': 'fair'}, {'age': 'senior', 'income': 'medium', 'student': 'yes', 'credit_rating': 'fair'}, {'age': 'youth', 'income': 'medium', 'student': 'yes', 'credit_rating': 'excellent'}, {'age': 'middle_aged', 'income': 'medium', 'student': 'no', 'credit_rating': 'excellent'}, {'age': 'middle_aged', 'income': 'high', 'student': 'yes', 'credit_rating': 'fair'}, {'age': 'senior', 'income': 'medium', 'student': 'no', 'credit_rating': 'excellent'}]\n",
      "14\n"
     ]
    }
   ],
   "source": [
    "# 读入数据\n",
    "Dtree = open(r'AllElectronics.csv', 'r')  # 专门读取csv。因为数据含字符。这里其实就是预处理\n",
    "reader = csv.reader(Dtree)\n",
    "\n",
    "# 获取第一行数据\n",
    "headers = reader.__next__()  #读取第1行，表头里所有字符\n",
    "print(headers)\n",
    "print('#####1#########')\n",
    "\n",
    "# 定义两个列表,特征和标签\n",
    "featureList = []\n",
    "labelList = []\n",
    "# print(len(row))  #  6列,不能单独使用row的定义在后面\n",
    "\n",
    "# 由于前面读取1行，有指针所有下面的循环从第2行开始\n",
    "for row in reader:  # \n",
    "    # 把label存入list\n",
    "    labelList.append(row[-1]) # 把最后一列逐行存到labellist\n",
    "    rowDict = {}\n",
    "    for i in range(1, len(row)-1):  # 列循环，不包括yes，no的列，range(1,5)注意是1到4列不包括最后1列\n",
    "        #建立一个数据字典\n",
    "        rowDict[headers[i]] = row[i]\n",
    "    # 把数据字典存入list\n",
    "    featureList.append(rowDict) # 将表头和特征对应保存\n",
    "\n",
    "print(featureList)\n",
    "print(len(featureList)) # 就是行数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "x_data: [[0. 0. 1. 0. 1. 1. 0. 0. 1. 0.]\n",
      " [0. 0. 1. 1. 0. 1. 0. 0. 1. 0.]\n",
      " [1. 0. 0. 0. 1. 1. 0. 0. 1. 0.]\n",
      " [0. 1. 0. 0. 1. 0. 0. 1. 1. 0.]\n",
      " [0. 1. 0. 0. 1. 0. 1. 0. 0. 1.]\n",
      " [0. 1. 0. 1. 0. 0. 1. 0. 0. 1.]\n",
      " [1. 0. 0. 1. 0. 0. 1. 0. 0. 1.]\n",
      " [0. 0. 1. 0. 1. 0. 0. 1. 1. 0.]\n",
      " [0. 0. 1. 0. 1. 0. 1. 0. 0. 1.]\n",
      " [0. 1. 0. 0. 1. 0. 0. 1. 0. 1.]\n",
      " [0. 0. 1. 1. 0. 0. 0. 1. 0. 1.]\n",
      " [1. 0. 0. 1. 0. 0. 0. 1. 1. 0.]\n",
      " [1. 0. 0. 0. 1. 1. 0. 0. 0. 1.]\n",
      " [0. 1. 0. 1. 0. 0. 0. 1. 1. 0.]]\n",
      "['age=middle_aged', 'age=senior', 'age=youth', 'credit_rating=excellent', 'credit_rating=fair', 'income=high', 'income=low', 'income=medium', 'student=no', 'student=yes']\n",
      "labelList: ['no', 'no', 'yes', 'yes', 'yes', 'no', 'yes', 'no', 'yes', 'yes', 'yes', 'yes', 'yes', 'no']\n",
      "y_data: [[0]\n",
      " [0]\n",
      " [1]\n",
      " [1]\n",
      " [1]\n",
      " [0]\n",
      " [1]\n",
      " [0]\n",
      " [1]\n",
      " [1]\n",
      " [1]\n",
      " [1]\n",
      " [1]\n",
      " [0]]\n"
     ]
    }
   ],
   "source": [
    "# 把数据转换成01表示\n",
    "vec = DictVectorizer()\n",
    "x_data = vec.fit_transform(featureList).toarray()\n",
    "print(\"x_data: \" + str(x_data))\n",
    "# print('&&&&')\n",
    "# print(x_data)\n",
    "\n",
    "# 打印属性名称\n",
    "# 如年龄有中老轻001，信用等级好不好01，。。。\n",
    "print(vec.get_feature_names()) # 按这10个特征转为1或0，上面10列\n",
    "\n",
    "# 打印标签\n",
    "print(\"labelList: \" + str(labelList))\n",
    "\n",
    "# 把标签转换成01表示\n",
    "lb = preprocessing.LabelBinarizer()\n",
    "y_data = lb.fit_transform(labelList)\n",
    "print(\"y_data: \" + str(y_data))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DecisionTreeClassifier(class_weight=None, criterion='entropy', max_depth=None,\n",
       "            max_features=None, max_leaf_nodes=None,\n",
       "            min_impurity_decrease=0.0, min_impurity_split=None,\n",
       "            min_samples_leaf=1, min_samples_split=2,\n",
       "            min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n",
       "            splitter='best')"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 创建决策树模型，DecisionTreeClassifier决策树分类器，\n",
    "# 默认是criterion='gini'是CART算法，这里是改为信息熵\n",
    "model = tree.DecisionTreeClassifier(criterion='entropy')\n",
    "\n",
    "# 输入数据建立模型\n",
    "model.fit(x_data, y_data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "x_test: [0. 0. 1. 0. 1. 1. 0. 0. 1. 0.]\n",
      "predict: [0]\n"
     ]
    }
   ],
   "source": [
    "# 测试\n",
    "x_test = x_data[0]\n",
    "print(\"x_test: \" + str(x_test))\n",
    "\n",
    "predict = model.predict(x_test.reshape(1,-1))  # 变成2维数据\n",
    "print(\"predict: \" + str(predict))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'computer.pdf'"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 导出决策树  ，这里需要pip下面的包，安装网站的软件，并把软件加入环境变量中去\n",
    "# pip install graphviz\n",
    "# http://www.graphviz.org/\n",
    "import graphviz \n",
    "\n",
    "#  feature_names = vec.get_feature_names(),特征名字，一个list\n",
    "# class_names = lb.classes_,标签yes和no\n",
    "dot_data = tree.export_graphviz(model, \n",
    "                                out_file = None, \n",
    "                                feature_names = vec.get_feature_names(), # 也可自己取名字=[,,,,]\n",
    "                                class_names = lb.classes_,\n",
    "                                filled = True,\n",
    "                                rounded = True,\n",
    "                                special_characters = True)\n",
    "graph = graphviz.Source(dot_data)\n",
    "graph.render('computer')  #多了一个pdf文件"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/svg+xml": [
       "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\r\n",
       "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\r\n",
       " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\r\n",
       "<!-- Generated by graphviz version 2.38.0 (20140413.2041)\r\n",
       " -->\r\n",
       "<!-- Title: Tree Pages: 1 -->\r\n",
       "<svg width=\"632pt\" height=\"552pt\"\r\n",
       " viewBox=\"0.00 0.00 632.00 552.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\r\n",
       "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 548)\">\r\n",
       "<title>Tree</title>\r\n",
       "<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-548 628,-548 628,4 -4,4\"/>\r\n",
       "<!-- 0 -->\r\n",
       "<g id=\"node1\" class=\"node\"><title>0</title>\r\n",
       "<path fill=\"#399de5\" fill-opacity=\"0.443137\" stroke=\"black\" d=\"M464.5,-544C464.5,-544 314.5,-544 314.5,-544 308.5,-544 302.5,-538 302.5,-532 302.5,-532 302.5,-473 302.5,-473 302.5,-467 308.5,-461 314.5,-461 314.5,-461 464.5,-461 464.5,-461 470.5,-461 476.5,-467 476.5,-473 476.5,-473 476.5,-532 476.5,-532 476.5,-538 470.5,-544 464.5,-544\"/>\r\n",
       "<text text-anchor=\"start\" x=\"310.5\" y=\"-528.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">age=middle_aged ≤ 0.5</text>\r\n",
       "<text text-anchor=\"start\" x=\"342.5\" y=\"-513.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">entropy = 0.94</text>\r\n",
       "<text text-anchor=\"start\" x=\"345.5\" y=\"-498.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 14</text>\r\n",
       "<text text-anchor=\"start\" x=\"348\" y=\"-483.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [5, 9]</text>\r\n",
       "<text text-anchor=\"start\" x=\"353\" y=\"-468.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">class = yes</text>\r\n",
       "</g>\r\n",
       "<!-- 1 -->\r\n",
       "<g id=\"node2\" class=\"node\"><title>1</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M378,-425C378,-425 265,-425 265,-425 259,-425 253,-419 253,-413 253,-413 253,-354 253,-354 253,-348 259,-342 265,-342 265,-342 378,-342 378,-342 384,-342 390,-348 390,-354 390,-354 390,-413 390,-413 390,-419 384,-425 378,-425\"/>\r\n",
       "<text text-anchor=\"start\" x=\"261\" y=\"-409.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">student=yes ≤ 0.5</text>\r\n",
       "<text text-anchor=\"start\" x=\"279\" y=\"-394.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">entropy = 1.0</text>\r\n",
       "<text text-anchor=\"start\" x=\"277.5\" y=\"-379.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 10</text>\r\n",
       "<text text-anchor=\"start\" x=\"280\" y=\"-364.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [5, 5]</text>\r\n",
       "<text text-anchor=\"start\" x=\"288\" y=\"-349.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">class = no</text>\r\n",
       "</g>\r\n",
       "<!-- 0&#45;&gt;1 -->\r\n",
       "<g id=\"edge1\" class=\"edge\"><title>0&#45;&gt;1</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M365.908,-460.907C360.844,-452.195 355.44,-442.897 350.207,-433.893\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"353.102,-431.908 345.051,-425.021 347.049,-435.425 353.102,-431.908\"/>\r\n",
       "<text text-anchor=\"middle\" x=\"338.65\" y=\"-445.487\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">True</text>\r\n",
       "</g>\r\n",
       "<!-- 12 -->\r\n",
       "<g id=\"node13\" class=\"node\"><title>12</title>\r\n",
       "<path fill=\"#399de5\" stroke=\"black\" d=\"M497,-417.5C497,-417.5 420,-417.5 420,-417.5 414,-417.5 408,-411.5 408,-405.5 408,-405.5 408,-361.5 408,-361.5 408,-355.5 414,-349.5 420,-349.5 420,-349.5 497,-349.5 497,-349.5 503,-349.5 509,-355.5 509,-361.5 509,-361.5 509,-405.5 509,-405.5 509,-411.5 503,-417.5 497,-417.5\"/>\r\n",
       "<text text-anchor=\"start\" x=\"416\" y=\"-402.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">entropy = 0.0</text>\r\n",
       "<text text-anchor=\"start\" x=\"419\" y=\"-387.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 4</text>\r\n",
       "<text text-anchor=\"start\" x=\"417\" y=\"-372.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [0, 4]</text>\r\n",
       "<text text-anchor=\"start\" x=\"422\" y=\"-357.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">class = yes</text>\r\n",
       "</g>\r\n",
       "<!-- 0&#45;&gt;12 -->\r\n",
       "<g id=\"edge12\" class=\"edge\"><title>0&#45;&gt;12</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M413.439,-460.907C420.012,-449.763 427.15,-437.658 433.767,-426.439\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"436.875,-428.059 438.94,-417.667 430.845,-424.503 436.875,-428.059\"/>\r\n",
       "<text text-anchor=\"middle\" x=\"445.187\" y=\"-438.174\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">False</text>\r\n",
       "</g>\r\n",
       "<!-- 2 -->\r\n",
       "<g id=\"node3\" class=\"node\"><title>2</title>\r\n",
       "<path fill=\"#e58139\" fill-opacity=\"0.749020\" stroke=\"black\" d=\"M273.5,-306C273.5,-306 167.5,-306 167.5,-306 161.5,-306 155.5,-300 155.5,-294 155.5,-294 155.5,-235 155.5,-235 155.5,-229 161.5,-223 167.5,-223 167.5,-223 273.5,-223 273.5,-223 279.5,-223 285.5,-229 285.5,-235 285.5,-235 285.5,-294 285.5,-294 285.5,-300 279.5,-306 273.5,-306\"/>\r\n",
       "<text text-anchor=\"start\" x=\"163.5\" y=\"-290.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">age=senior ≤ 0.5</text>\r\n",
       "<text text-anchor=\"start\" x=\"169.5\" y=\"-275.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">entropy = 0.722</text>\r\n",
       "<text text-anchor=\"start\" x=\"181\" y=\"-260.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 5</text>\r\n",
       "<text text-anchor=\"start\" x=\"179\" y=\"-245.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [4, 1]</text>\r\n",
       "<text text-anchor=\"start\" x=\"187\" y=\"-230.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">class = no</text>\r\n",
       "</g>\r\n",
       "<!-- 1&#45;&gt;2 -->\r\n",
       "<g id=\"edge2\" class=\"edge\"><title>1&#45;&gt;2</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M286.459,-341.907C278.624,-332.832 270.241,-323.121 262.169,-313.769\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"264.663,-311.303 255.48,-306.021 259.365,-315.878 264.663,-311.303\"/>\r\n",
       "</g>\r\n",
       "<!-- 7 -->\r\n",
       "<g id=\"node8\" class=\"node\"><title>7</title>\r\n",
       "<path fill=\"#399de5\" fill-opacity=\"0.749020\" stroke=\"black\" d=\"M492.5,-306C492.5,-306 352.5,-306 352.5,-306 346.5,-306 340.5,-300 340.5,-294 340.5,-294 340.5,-235 340.5,-235 340.5,-229 346.5,-223 352.5,-223 352.5,-223 492.5,-223 492.5,-223 498.5,-223 504.5,-229 504.5,-235 504.5,-235 504.5,-294 504.5,-294 504.5,-300 498.5,-306 492.5,-306\"/>\r\n",
       "<text text-anchor=\"start\" x=\"348.5\" y=\"-290.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">credit_rating=fair ≤ 0.5</text>\r\n",
       "<text text-anchor=\"start\" x=\"371.5\" y=\"-275.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">entropy = 0.722</text>\r\n",
       "<text text-anchor=\"start\" x=\"383\" y=\"-260.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 5</text>\r\n",
       "<text text-anchor=\"start\" x=\"381\" y=\"-245.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [1, 4]</text>\r\n",
       "<text text-anchor=\"start\" x=\"386\" y=\"-230.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">class = yes</text>\r\n",
       "</g>\r\n",
       "<!-- 1&#45;&gt;7 -->\r\n",
       "<g id=\"edge7\" class=\"edge\"><title>1&#45;&gt;7</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M356.541,-341.907C364.376,-332.832 372.759,-323.121 380.831,-313.769\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"383.635,-315.878 387.52,-306.021 378.337,-311.303 383.635,-315.878\"/>\r\n",
       "</g>\r\n",
       "<!-- 3 -->\r\n",
       "<g id=\"node4\" class=\"node\"><title>3</title>\r\n",
       "<path fill=\"#e58139\" stroke=\"black\" d=\"M89,-179.5C89,-179.5 12,-179.5 12,-179.5 6,-179.5 0,-173.5 0,-167.5 0,-167.5 0,-123.5 0,-123.5 0,-117.5 6,-111.5 12,-111.5 12,-111.5 89,-111.5 89,-111.5 95,-111.5 101,-117.5 101,-123.5 101,-123.5 101,-167.5 101,-167.5 101,-173.5 95,-179.5 89,-179.5\"/>\r\n",
       "<text text-anchor=\"start\" x=\"8\" y=\"-164.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">entropy = 0.0</text>\r\n",
       "<text text-anchor=\"start\" x=\"11\" y=\"-149.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 3</text>\r\n",
       "<text text-anchor=\"start\" x=\"9\" y=\"-134.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [3, 0]</text>\r\n",
       "<text text-anchor=\"start\" x=\"17\" y=\"-119.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">class = no</text>\r\n",
       "</g>\r\n",
       "<!-- 2&#45;&gt;3 -->\r\n",
       "<g id=\"edge3\" class=\"edge\"><title>2&#45;&gt;3</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M161.519,-222.907C143.883,-210.769 124.593,-197.493 107.112,-185.462\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"108.913,-182.453 98.6913,-179.667 104.945,-188.22 108.913,-182.453\"/>\r\n",
       "</g>\r\n",
       "<!-- 4 -->\r\n",
       "<g id=\"node5\" class=\"node\"><title>4</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M310,-187C310,-187 131,-187 131,-187 125,-187 119,-181 119,-175 119,-175 119,-116 119,-116 119,-110 125,-104 131,-104 131,-104 310,-104 310,-104 316,-104 322,-110 322,-116 322,-116 322,-175 322,-175 322,-181 316,-187 310,-187\"/>\r\n",
       "<text text-anchor=\"start\" x=\"127\" y=\"-171.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">credit_rating=excellent ≤ 0.5</text>\r\n",
       "<text text-anchor=\"start\" x=\"178\" y=\"-156.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">entropy = 1.0</text>\r\n",
       "<text text-anchor=\"start\" x=\"181\" y=\"-141.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 2</text>\r\n",
       "<text text-anchor=\"start\" x=\"179\" y=\"-126.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [1, 1]</text>\r\n",
       "<text text-anchor=\"start\" x=\"187\" y=\"-111.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">class = no</text>\r\n",
       "</g>\r\n",
       "<!-- 2&#45;&gt;4 -->\r\n",
       "<g id=\"edge4\" class=\"edge\"><title>2&#45;&gt;4</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M220.5,-222.907C220.5,-214.649 220.5,-205.864 220.5,-197.302\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"224,-197.021 220.5,-187.021 217,-197.021 224,-197.021\"/>\r\n",
       "</g>\r\n",
       "<!-- 5 -->\r\n",
       "<g id=\"node6\" class=\"node\"><title>5</title>\r\n",
       "<path fill=\"#399de5\" stroke=\"black\" d=\"M161,-68C161,-68 84,-68 84,-68 78,-68 72,-62 72,-56 72,-56 72,-12 72,-12 72,-6 78,-0 84,-0 84,-0 161,-0 161,-0 167,-0 173,-6 173,-12 173,-12 173,-56 173,-56 173,-62 167,-68 161,-68\"/>\r\n",
       "<text text-anchor=\"start\" x=\"80\" y=\"-52.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">entropy = 0.0</text>\r\n",
       "<text text-anchor=\"start\" x=\"83\" y=\"-37.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 1</text>\r\n",
       "<text text-anchor=\"start\" x=\"81\" y=\"-22.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [0, 1]</text>\r\n",
       "<text text-anchor=\"start\" x=\"86\" y=\"-7.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">class = yes</text>\r\n",
       "</g>\r\n",
       "<!-- 4&#45;&gt;5 -->\r\n",
       "<g id=\"edge5\" class=\"edge\"><title>4&#45;&gt;5</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M184.008,-103.726C175.845,-94.6054 167.186,-84.93 159.022,-75.8078\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"161.579,-73.417 152.302,-68.2996 156.363,-78.0853 161.579,-73.417\"/>\r\n",
       "</g>\r\n",
       "<!-- 6 -->\r\n",
       "<g id=\"node7\" class=\"node\"><title>6</title>\r\n",
       "<path fill=\"#e58139\" stroke=\"black\" d=\"M280,-68C280,-68 203,-68 203,-68 197,-68 191,-62 191,-56 191,-56 191,-12 191,-12 191,-6 197,-0 203,-0 203,-0 280,-0 280,-0 286,-0 292,-6 292,-12 292,-12 292,-56 292,-56 292,-62 286,-68 280,-68\"/>\r\n",
       "<text text-anchor=\"start\" x=\"199\" y=\"-52.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">entropy = 0.0</text>\r\n",
       "<text text-anchor=\"start\" x=\"202\" y=\"-37.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 1</text>\r\n",
       "<text text-anchor=\"start\" x=\"200\" y=\"-22.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [1, 0]</text>\r\n",
       "<text text-anchor=\"start\" x=\"208\" y=\"-7.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">class = no</text>\r\n",
       "</g>\r\n",
       "<!-- 4&#45;&gt;6 -->\r\n",
       "<g id=\"edge6\" class=\"edge\"><title>4&#45;&gt;6</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M228.32,-103.726C229.911,-95.4263 231.591,-86.6671 233.199,-78.2834\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"236.668,-78.7799 235.114,-68.2996 229.793,-77.4614 236.668,-78.7799\"/>\r\n",
       "</g>\r\n",
       "<!-- 8 -->\r\n",
       "<g id=\"node9\" class=\"node\"><title>8</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M492.5,-187C492.5,-187 352.5,-187 352.5,-187 346.5,-187 340.5,-181 340.5,-175 340.5,-175 340.5,-116 340.5,-116 340.5,-110 346.5,-104 352.5,-104 352.5,-104 492.5,-104 492.5,-104 498.5,-104 504.5,-110 504.5,-116 504.5,-116 504.5,-175 504.5,-175 504.5,-181 498.5,-187 492.5,-187\"/>\r\n",
       "<text text-anchor=\"start\" x=\"348.5\" y=\"-171.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">income=medium ≤ 0.5</text>\r\n",
       "<text text-anchor=\"start\" x=\"380\" y=\"-156.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">entropy = 1.0</text>\r\n",
       "<text text-anchor=\"start\" x=\"383\" y=\"-141.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 2</text>\r\n",
       "<text text-anchor=\"start\" x=\"381\" y=\"-126.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [1, 1]</text>\r\n",
       "<text text-anchor=\"start\" x=\"389\" y=\"-111.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">class = no</text>\r\n",
       "</g>\r\n",
       "<!-- 7&#45;&gt;8 -->\r\n",
       "<g id=\"edge8\" class=\"edge\"><title>7&#45;&gt;8</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M422.5,-222.907C422.5,-214.649 422.5,-205.864 422.5,-197.302\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"426,-197.021 422.5,-187.021 419,-197.021 426,-197.021\"/>\r\n",
       "</g>\r\n",
       "<!-- 11 -->\r\n",
       "<g id=\"node12\" class=\"node\"><title>11</title>\r\n",
       "<path fill=\"#399de5\" stroke=\"black\" d=\"M612,-179.5C612,-179.5 535,-179.5 535,-179.5 529,-179.5 523,-173.5 523,-167.5 523,-167.5 523,-123.5 523,-123.5 523,-117.5 529,-111.5 535,-111.5 535,-111.5 612,-111.5 612,-111.5 618,-111.5 624,-117.5 624,-123.5 624,-123.5 624,-167.5 624,-167.5 624,-173.5 618,-179.5 612,-179.5\"/>\r\n",
       "<text text-anchor=\"start\" x=\"531\" y=\"-164.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">entropy = 0.0</text>\r\n",
       "<text text-anchor=\"start\" x=\"534\" y=\"-149.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 3</text>\r\n",
       "<text text-anchor=\"start\" x=\"532\" y=\"-134.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [0, 3]</text>\r\n",
       "<text text-anchor=\"start\" x=\"537\" y=\"-119.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">class = yes</text>\r\n",
       "</g>\r\n",
       "<!-- 7&#45;&gt;11 -->\r\n",
       "<g id=\"edge11\" class=\"edge\"><title>7&#45;&gt;11</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M474.889,-222.907C490.269,-210.99 507.066,-197.976 522.367,-186.12\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"524.934,-188.559 530.695,-179.667 520.646,-183.025 524.934,-188.559\"/>\r\n",
       "</g>\r\n",
       "<!-- 9 -->\r\n",
       "<g id=\"node10\" class=\"node\"><title>9</title>\r\n",
       "<path fill=\"#e58139\" stroke=\"black\" d=\"M440,-68C440,-68 363,-68 363,-68 357,-68 351,-62 351,-56 351,-56 351,-12 351,-12 351,-6 357,-0 363,-0 363,-0 440,-0 440,-0 446,-0 452,-6 452,-12 452,-12 452,-56 452,-56 452,-62 446,-68 440,-68\"/>\r\n",
       "<text text-anchor=\"start\" x=\"359\" y=\"-52.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">entropy = 0.0</text>\r\n",
       "<text text-anchor=\"start\" x=\"362\" y=\"-37.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 1</text>\r\n",
       "<text text-anchor=\"start\" x=\"360\" y=\"-22.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [1, 0]</text>\r\n",
       "<text text-anchor=\"start\" x=\"368\" y=\"-7.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">class = no</text>\r\n",
       "</g>\r\n",
       "<!-- 8&#45;&gt;9 -->\r\n",
       "<g id=\"edge9\" class=\"edge\"><title>8&#45;&gt;9</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M414.68,-103.726C413.089,-95.4263 411.409,-86.6671 409.801,-78.2834\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"413.207,-77.4614 407.886,-68.2996 406.332,-78.7799 413.207,-77.4614\"/>\r\n",
       "</g>\r\n",
       "<!-- 10 -->\r\n",
       "<g id=\"node11\" class=\"node\"><title>10</title>\r\n",
       "<path fill=\"#399de5\" stroke=\"black\" d=\"M559,-68C559,-68 482,-68 482,-68 476,-68 470,-62 470,-56 470,-56 470,-12 470,-12 470,-6 476,-0 482,-0 482,-0 559,-0 559,-0 565,-0 571,-6 571,-12 571,-12 571,-56 571,-56 571,-62 565,-68 559,-68\"/>\r\n",
       "<text text-anchor=\"start\" x=\"478\" y=\"-52.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">entropy = 0.0</text>\r\n",
       "<text text-anchor=\"start\" x=\"481\" y=\"-37.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 1</text>\r\n",
       "<text text-anchor=\"start\" x=\"479\" y=\"-22.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [0, 1]</text>\r\n",
       "<text text-anchor=\"start\" x=\"484\" y=\"-7.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">class = yes</text>\r\n",
       "</g>\r\n",
       "<!-- 8&#45;&gt;10 -->\r\n",
       "<g id=\"edge10\" class=\"edge\"><title>8&#45;&gt;10</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M458.992,-103.726C467.155,-94.6054 475.814,-84.93 483.978,-75.8078\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"486.637,-78.0853 490.698,-68.2996 481.421,-73.417 486.637,-78.0853\"/>\r\n",
       "</g>\r\n",
       "</g>\r\n",
       "</svg>\r\n"
      ],
      "text/plain": [
       "<graphviz.files.Source at 0x19e823ee3c8>"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "graph  # entropy是交叉熵，value = [5, 9]左边no，右边yes，samples = 14指14个样本"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['age=middle_aged',\n",
       " 'age=senior',\n",
       " 'age=youth',\n",
       " 'credit_rating=excellent',\n",
       " 'credit_rating=fair',\n",
       " 'income=high',\n",
       " 'income=low',\n",
       " 'income=medium',\n",
       " 'student=no',\n",
       " 'student=yes']"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "vec.get_feature_names()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['no', 'yes'], dtype='<U3')"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lb.classes_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
